import scipy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from heapq import heappush, heappop, heapify
sns.set(color_codes=True)
%matplotlib inline
### LGA TAXI SIMULATION! ###
class Rider:
"""
A Rider (Passenger) class
"""
def __init__(self, rider_id=None, arrival_time=None, patience=20):
"""
:param start_time (float): global time when passenger entered queue
:param car_time (float): global time when passenger got into car
:param patience (float): threshold for waiting for a car before taking an alternative
:param pay (int): amount paid
"""
self.rider_id = rider_id # TODO: probably don't need that
self.arrival_time = arrival_time
# rider patience is normalized around 15, but cutoff at 20
self.patience = min(np.random.normal(15,5), 20)
# destination is randomly set between NYC boroughs directions
# roughly by proportions of residents.
# DESTINATION: to be used later in aggregation and weighting trip times
self.destination = np.random.choice(a=[1, 2, 3, 4], p=[0.5, 0.2, 0.2, 0.1])
class Taxi:
"""
A Vehicle class
"""
def __init__(self, queue_start_time=None): # taxi_id
"""
Initializing paramters of a Taxi
:param arrive_time (float): time arrived at queue
:param pickup_time (float):
:param seats (int):
:param location (int: 0,1): 0 = outside ot LGA / queue, 1= LGA / queue
"""
# self.id = taxi_id
# self.max_seats = 1 # num seats it needs to fill before departing
self.num_riders = 0 # num of current riders
self.destination = None
# waiting and costs
self.queue_start_time = queue_start_time
self.wage_rate = 30 # drivers wage is 30$ average per hour #VARIABLE
self.wages_paid = 0 # how much have we paid driver (so far)
self.earnings = 0 # starts with 0 earnings: money earned from taxi trips
self.busy = 0 # handle if taxi is busy or not
self.waiting_time = 0 # queue waiting time we'll pay for
self.idle_trip_times_total = 0 # idle trip time we'll pay for
def enter_LGA_queue(self, time):
self.queue_start_time = time
def finish_LGA_queue(self, time):
queue_time = time - self.queue_start_time
self.waiting_time += queue_time
return queue_time
def assign_pickup(self, rider, pickup_time, trip_money=30):
"""
Assigns a pickup request
:param rider:
:param pickup_time:
:param trip_money:
:return:
"""
# check if free
# from self.busy and by timing of pickups
if self.busy == 0: #
# set pickup
self.perform_pickup(pickup_time, trip_money)
else:
raise StandardError("Taxi is Busy!") # TODO: Deal with busy cabs
def perform_pickup(self, pickup_time, trip_money=30):
"""
Perform pickup
:param rider:
:param pickup_time:
:param trip_money:
:return:
"""
self.num_riders += 1 # num of current riders
self.pickup_time = pickup_time
# self.riders = [rider] #TODO not sure if useful
# self.destination = rider.destination
# if self.destination == None and riders != []:
# self.destination = riders[0].destination
self.earnings += trip_money
class TaxiStand_Controller():
def __init__(self, n_rider_arrivals=100, n_dropoffs=10, end_time=60,
total_taxis=200, strategy=1, queue_min_buffer=3, tolerance_proportion = 0.1,
rate_coefficient = 1, trip_price=30, show=True):
"""
:param n_rider_arrivals (int): Total num rider appearances/requests from LGA per timespan (default: hour)
:param n_dropoffs (int): Total num random dropoffs at LGA per timespan (default: hour)
:param end_time (int): total simulation time. default: hour.
:param total_taxis (int): total number of taxis in our fleet
:param strategy (int): choose strategy order number to use: 1 or 2
:param queue_min_buffer: (int/float) for strategy 1: approximate desired avg minutes for taxi to wait in queue,
to be used in calculating ideal number of taxis in queue
:param tolerance_proportion: (float) for Strategy 1: percent deviation from optimal taxi number to tolerate before
:param rate_coefficient: (float) for Strategy 2: coefficient to multiply the postulated rate of cars to send, to be optimzed
:param trip_price: (int) 30$ per hour in the simulated hours
:param show: (bool) whether or not to print results
"""
# Time boundary and initialization
self.time = 0
self.end_time = end_time
# Events Distribution Parameters
# TODO: get these params from NYC TAXI data
self.n_dropoffs = n_dropoffs
self.n_rider_arrivals = n_rider_arrivals
self.times_dropoffs = None
self.times_arrivals = None
self.trip_time_mean = 30
self.trip_time_sd = 15
# STRATEGIES
self.strategy = strategy
# STRATEGY 1 parameters
# compute optimal queue size
self.queue_min_buffer = queue_min_buffer
self.optimal_taxis = self.get_optimal_LGA_queue_size() if self.strategy == 1 else 0
# set tolerance of how many cars to allow away from optimal
self.tolerance_to_optimal_taxis = max(1, tolerance_proportion * self.optimal_taxis) if self.strategy == 1 else 0
# STRATEGY 2 parameters
self.rate = 0
self.rate_coefficient = rate_coefficient
# Setting Event Queue
self.schedule = [] # Event Queue: heapq with event times as keys and events as values
# each event will be stored as this list format: [execution_time, event_function, list_of_arguments]
# using heappush(self.schedule,[execution_time,func,[args_list]])
self.current_event = None # the event being currently handled
# Logging events
self.df = pd.DataFrame() # df of metrics
self.show = show
# LGA Waiting Taxis Queue
initial_LGA_taxis = self.n_rider_arrivals // 10 # to have enough for the first 6 min
self.LGA_taxis_queue = [0] * initial_LGA_taxis # simple list of queue start times for taxis waiting at LGA
self.LGA_taxis_queue_len_history = [[0.0,initial_LGA_taxis]] # this will be history of: [time, taxi queue length]
self.on_way_to_LGA = [] # list of arrival times at LGA queue of random dropoffs and cars we send
self.weight_for_onway = 0.3 # weight coefficient to discount cars on the way to LGA when calculating queue size
self.MN_taxis = total_taxis - initial_LGA_taxis # counter of taxis in Manhattan
# METRICS and SERVICE QUALITY
self.riders_count = 0
self.riders_wait_times = []
self.churned = 0 # riders who left because ride was too har (too long ETA)
self.pickedup_riders = 0
self.dropoffs_LGA_occured = 0
# Payments, costs
self.driver_queueing_times_at_LGA = [] # times taxis waited queueing in LGA
self.driving_idle_time_total = 0 # idle trip time we'll pay for
self.driving_to_pickup_time_total = 0 # time spent driving to pickup
self.driving_with_riders_time_total = 0 #time spent driving with a rider in the car
self.trip_price = trip_price # currently, flat fair. later, have variable trip price based on Time of Day, Distance, +1. #TODO
self.wage_hourly = 30 #flat hourly wage
self.costs = 0
self.earnings = 0
self.score_cost = 0 # COST FUNCTION TO BE EVENTUALLY MINIMIZED
def draw_ride_times(self,riders_expected=600,n_peaks=5,sd_ratio=0.01,plot=False):
"""
Draws arrival / dropoff times at LGA per hour
From a Gaussian Mixture: specifying total riders per hour, number of peaks,
and SD / scale of spread per peak, this imputes a normal (Gaussian) distribution
around each peak time and mixes the them together
:param riders_per_hour: (int) number of riders arriving within the hour
:param n_peaks: number of distinct large peaks within the hour (corresponding num of flights)
:param sd_ratio: Standard deviation / scale of variance (spread) around the mean
This should be significantly wider for dropoffs
because of people's various time planning habits
whereas for landings (pickups), people exit more closely together
:return: The total list of arrival/dropoff times within an hour (from 0-60)
"""
# divide peak times (n_flights) currently uniformly along the hour
flights_peaks = np.sort(np.random.uniform(low=0,high=self.end_time,size=n_peaks))
# how many riders total and per bump
riders_per_peak = int(riders_expected / n_peaks)
riders_total = []
for mu in flights_peaks:
# generate normal distribution of passengers around that peak.
# distribution around each peak, remainder out of 60 (forces to be between 0-60)
rider_partial_dist = np.mod(np.random.normal(loc=mu, scale=sd_ratio*riders_per_peak, size=riders_per_peak),self.end_time) #% 60
riders_total.extend(rider_partial_dist)
if plot:
print "\n\n\nTotal Distribution"
sns.distplot(riders_total,bins=100,rug=True,kde=True).set_title("Rider arrivals within one hour")
return riders_total
def initialize_events(self):
"""
Initialize all simulation events, schedule them into the Event Queue schedule.
:return:
"""
# Get all dropoff times
# assuming dropoffs have a similar distribution but wider and less spiky
# using self.end_time//10 as number of peaks, assuming that planes leave on average every 10 min
self.times_dropoffs = self.draw_ride_times(self.n_dropoffs,n_peaks=self.end_time//10,sd_ratio=0.4)
# Get all rider arrival times
self.times_arrivals = self.draw_ride_times(self.n_rider_arrivals,n_peaks=self.end_time//10,sd_ratio=0.05)
# schedule all dropoffs into event queue at corresponding times
for dropoff_time in self.times_dropoffs:
heappush(self.schedule,[dropoff_time-self.trip_time_mean,self.dropoff_LGA,[]])
# schedule all rider_arrivals (requests) into event queue at corresponding times
for arrival_time in self.times_arrivals:
heappush(self.schedule,[arrival_time,self.rider_arrives,[]])
if self.LGA_taxis_queue_len_history:
self.previous_queue_len_history = self.LGA_taxis_queue_len_history
self.LGA_taxis_queue_len_history = []
# Strategies:
# Strategy 1 is handled in-time, only when changing the queue size
# Strategy 2: send taxis at constant rate - schedule all these events now
if self.strategy == 2:
self.strategy2_send_constant_rate()
# Handle nice printed explanations about time
if self.show:
print("Started Planning for this hour in the previous hour and sending cars into LGA since ~30 minutes prior. This shows as negative time for an hour")
heappush(self.schedule,[0.00,self.announce_start_time,[]])
def announce_start_time(self):
if self.time == 0 and self.show: print "\n---------SIMULATION OFFICIALLY STARTED: RIDERS START ARRIVING--------\n--time = 0.00--"
def get_optimal_LGA_queue_size(self):
"""
One time run function to determine optimal queue size
:return: (int) optimal number of Taxis to have at LGA queue
"""
needed_taxis_overall = self.n_rider_arrivals - self.n_dropoffs
cars_per_minute = needed_taxis_overall/float(self.end_time)
# Queue_min_buffer: parameter of ideal queue waiting time on average
self.optimal_taxis = cars_per_minute * self.queue_min_buffer
return self.optimal_taxis
def print_schedule(self):
print("\nEVENT QUEUE SCHEDULE:")
schedule_copy = [i for i in self.schedule]
for i in range(len(schedule_copy)):
time, func, args = heappop(schedule_copy)
print(" at {:06.3f} - {} , args: {}".format(time, func.__name__, args))
print
#def get_next_LGA_dropoff_time(self):
# return self.time + scipy.random.exponential(self.LGA_dropoffs_rate)
def get_trip_time(self, mean = self.trip_time_mean, sd = self.trip_time_sd):
"""
Draw a random trip time.
#TODO: Base this on actual distribution of trip times.
:return:
"""
# TODO: Implement better trip time distribution
return abs(np.random.normal(mean, sd)) # PARAMETER
def dropoff_LGA(self):
"""
Randomly, (with specified frequency), a taxi will have a dropoff at LGA.
This function schedules the next dropoff to LGA
"""
# take a car from manhattan, drive it to LGA
self.MN_taxis -= 1 # remove car from Manhattan
trip_duration = self.trip_time_mean
arrival_time_LGA = self.time + trip_duration
# Record taxi in the "on the way to LGA" queue: push into queue of taxis on the way to LGA (recorded as arrival time)
heappush(self.on_way_to_LGA ,arrival_time_LGA)
# PUSH INTO EVENTS QUEUE: put car into LGA Queue when arriving there
heappush(self.schedule, [arrival_time_LGA, self.taxi_enters_LGA_queue, [arrival_time_LGA]])
self.driving_with_riders_time_total += trip_duration # record driving time
if self.show: print("{:06.3f}: LGA DROPOFF scheduled at {:06.3f}".format(self.time, arrival_time_LGA))
self.dropoffs_LGA_occured += 1 # document dropoff to be
return
def taxi_enters_LGA_queue(self, current_time):
"""
Taxi enters waiting queue at LGA, records dropoff and queue start time
"""
if self.on_way_to_LGA:
popped_time = heappop(self.on_way_to_LGA)
# # debugging
# if self.show and abs(popped_time - current_time) > 1.0:
# print("****Popped taxi time {:06.3f} arriving to LGA is different than the current time {:06.3f}".format(popped_time, current_time)) # for debugging; solved
# else:
# if self.show: print("****NO CARS 'ON THE WAY' TO LGA; but taxi just entered queue") #debugging
self.LGA_taxis_queue.append(current_time) # insert the CURRENT time it just entered at
self.record_taxi_queue_size()
# Adjust queue size: if too large, send someone away
self.strategy1_adjust_LGA_queue()
def taxi_enters_MN(self):
""" Taxi arrives at Manhattan; increasing MN taxis counter"""
self.MN_taxis += 1
#def get_next_rider_time(self):
# return self.time + scipy.random.exponential(self.rider_arrival_rate)
def strategy1_adjust_LGA_queue(self):
"""
Strategy 1:
Keep an approximate number of cars at the LGA queue,
As a function of the expected number of riders per hour
and the expected number of dropoffs per hour
Checks current number of Taxis in LGA Queue and on the way
Sends more taxis if number is too low
:return:
"""
# only operate if selected strategy is this one
if self.strategy != 1: return
# Decision function
lacking_taxis = self.optimal_taxis - (len(self.LGA_taxis_queue) + self.weight_for_onway*len(self.on_way_to_LGA)) #positive if we have less taxis than needed
if lacking_taxis > self.tolerance_to_optimal_taxis:
# if we have too few taxis, Send more in!
self.send_tx_to_LGA()
if self.show: print("{:06.3f}: SENDING CAR TO LGA: queue had {} taxis and {} on the way, while optimal is {}".format(
self.time, len(self.LGA_taxis_queue),len(self.on_way_to_LGA), round(self.optimal_taxis,2)))
excess_proportion_allowed = 3 # parameter manually decided upon, to be refined later on
if lacking_taxis < -self.tolerance_to_optimal_taxis * excess_proportion_allowed :
# if we have MUCH too many taxis (twice as much buffer, because if they already stay it's better to sit and stay)
self.send_tx_away_to_MN()
return
def strategy2_send_constant_rate(self):
"""
Strategy 2:
Send taxis at a constant rate, based on predicted:
n_arrivals per hour
:return:
"""
if self.strategy != 2: return
needed_taxis = self.n_rider_arrivals - self.n_dropoffs
self.rate = self.rate_coefficient * float(self.end_time)/needed_taxis # TESTING COEFFICIENT to weight rate
print("\n\n\n~~~~~~SENDING TAXIS EVERY {} MIN CONSTANTLY FOR {} MIN~~~~~~\n\n\n".format(round(self.rate,2),self.end_time))
# could also do with np.linspace but wanted to use RATE
taxi_arrival_times = np.arange(start=0,stop=self.end_time,step=self.rate)
# incorporating uncertainty in "prediction" and translation:
# assuming same mean trip time for all cars and sending cars same mean time beforehands
for taxi_arrival_time in taxi_arrival_times:
# schedule the SENDING of a taxi an expected mean trip_time before when we'd need it
heappush(self.schedule, [taxi_arrival_time-self.trip_time_mean, self.taxi_enters_LGA_queue, [taxi_arrival_time]])
return
def send_tx_to_LGA(self):
"""
FORCE SEND to go wait at LGA queue
"""
# take a car from manhattan, drive it to LGA
self.MN_taxis -= 1 # remove car from Manhattan
trip_duration = self.get_trip_time()
arrival_time_LGA = self.time + trip_duration
# Record arrival time in queue of taxis on the way to LGA
heappush(self.on_way_to_LGA ,arrival_time_LGA)
# PUSH INTO EVENTS QUEUE: put car into LGA Queue when arriving there
heappush(self.schedule, [arrival_time_LGA, self.taxi_enters_LGA_queue, [arrival_time_LGA]])
# record idle driving time
self.driving_idle_time_total += trip_duration
if self.show: print("{:06.3f}: SENT CAR TO LGA, arriving at {:06.3f}".format(self.time, arrival_time_LGA))
return
def send_tx_away_to_MN(self):
"""
FORCE SEND AWAY taxi from LGA queue to Manhattan
"""
time_away_from_LGA = 0
# 1. try first sending car ON THE WAY to LGA back to MN
if self.on_way_to_LGA:
# if there are cars on the way to LGA, send the furthest one back
# because the Heapq structure doesn't have a direct
listsorted = sorted(self.on_way_to_LGA)
arrival_time_at_LGA = listsorted.pop()
time_away_from_LGA = abs(arrival_time_at_LGA - self.time)
heapify(listsorted)
self.on_way_to_LGA = listsorted
if self.show: print("{:06.3f}: Sending car from On_the_way_to_LGA back to MN, which was planned to arrive at {:06.3f}".format(self.time, arrival_time_at_LGA))
# 2. If not, remove the last car from LGA queue
else:
# pop last taxi in LGA queue
queue_start_time = self.LGA_taxis_queue.pop(-1)
# record waiting time in vain :(
waiting_time = self.time - queue_start_time
self.driver_queueing_times_at_LGA.append(waiting_time)
self.record_taxi_queue_size()
if self.show: print("{:06.3f}: Sending car from LGA_Taxi_Queue back to MN, after it waited for {:06.3f} min".format(self.time, waiting_time))
# get trip duration back to MN
trip_duration = abs(self.get_trip_time() - time_away_from_LGA) # if the car is midway between MN and LGA, trip is shorter
# schedule arrival to MN
arrival_time_LGA = self.time + trip_duration
heappush(self.schedule,[arrival_time_LGA,self.taxi_enters_MN,[]])
# record (future) idle trip time
self.driving_idle_time_total += trip_duration
if self.show: print("{:06.3f}: SENT CAR AWAY FROM LGA QUEUE TO MN, scheduled at {:06.3f}".format(self.time, arrival_time_LGA))
return
def record_taxi_queue_size(self):
"""
Records current Taxi Queue size at LGA in history
"""
self.LGA_taxis_queue_len_history.append([self.time,len(self.LGA_taxis_queue)])
def rider_arrives(self):
"""
Rider requests a Taxi.
If next taxi is too long for them, they leave.
If the next taxi time is okay, they wait, and trip details are recorded.
"""
# New Rider appears and requests a ride
self.riders_count += 1
rider = Rider(self.riders_count, self.time, patience=20)
if self.show: print("{:06.3f}: RIDER {} ARRIVES and requests ride; patience {}".format(self.time, self.riders_count, rider.patience))
################## HANDLE RIDE REQUEST: ##################
################## Case 1 : LGA Queue ##################
# 1. Get a taxi from LGA waiting Taxis queue if possible
if self.LGA_taxis_queue:
# If Taxi in LGA queue, it will arrive in 1 minute
taxi_queue_start_time = self.LGA_taxis_queue.pop(0) # take the first taxi in line (index 0 in list)
taxi = Taxi(queue_start_time=taxi_queue_start_time) # initialize a Taxi
# Record Taxi waiting time and changes to queue size // I could remove Taxi object here and just use: self.time - taxi_queue_start_time #TODO
taxi_waited_time = taxi.finish_LGA_queue(self.time)
self.driver_queueing_times_at_LGA.append(taxi_waited_time)
self.record_taxi_queue_size()
taxi_arrival_interval = abs(np.random.normal(1.5, 0.5)) # Taxi will arrive from LGA queue in roughly 1-2 minutes #TODO: what's the actual time from LGA queue
if self.strategy == 1:
# STRATEGY 1 : CHECK QUEUE SIZE AND ADJUST TO REMAIN AROUND OPTIMUM
self.strategy1_adjust_LGA_queue()
if self.show: print("{:06.3f}: LGA QUEUE TAXI waited for {:06.3f} min in LGA picks up rider {} in {:06.3f} min".format(self.time, taxi_waited_time, self.riders_count, taxi_arrival_interval))
################## Case 2 : Call car from Manhatten ##################
# 2. If there's no taxi in the queue there, call a Taxi from Manhattan
else:
# rider requests a taxi and gets the next taxi time
taxi_arrival_interval = self.get_trip_time()
if taxi_arrival_interval > rider.patience:
# if next_taxi_time is too long for the rider's patience, rider leaves, ride dismissed
self.churned += 1
if self.show: print("{:06.3f}: rider {} LEFT. waiting {:.2f} min exceeded patience {:.2f} min".format(self.time, self.riders_count, taxi_arrival_interval ,rider.patience))
# Rider leaves. this ends execution ends. moves to next event.
return
else:
# if the taxi is within rider's patience, assign taxi and record trip
taxi = Taxi()
self.MN_taxis -= 1 # reduce counter of taxis in Manhattan, since we've taken out a taxi
if self.show: print("{:06.3f}: rider {} gets car from Manhattan coming in {:.2f} min, below patience {}".format(self.time, self.riders_count, taxi_arrival_interval ,rider.patience))
##################################################
# Assign pickup
pickup_time = self.time + taxi_arrival_interval
taxi.assign_pickup(rider, pickup_time)
taxi.busy = True
# PUSH SCHEDULED PICKUP to Event Queue
# heappush(self.schedule, [pickup_time, taxi.perform_pickup, [pickup_time]])
if self.show: print("{:06.3f}: scheduled pickup for rider {} at {:06.3f} min".format(self.time, self.riders_count, pickup_time))
# PUSH arrival to Manhattan to trip_end time
trip_duration = self.get_trip_time()
trip_end_time = pickup_time + trip_duration # rider home trip time as some function of their destination #TODO: reasonable distriubiton
# example function suggestion: trip_duration*0.5*rider.destination
heappush(self.schedule, [trip_end_time , self.taxi_enters_MN, [] ])
if self.show: print("{:06.3f}: trip will end for rider {} after {} at {:06.3f} min".format(self.time, self.riders_count, trip_duration ,trip_end_time))
# RECORD INFO: Collect waiting times, prices
self.riders_wait_times.append(taxi_arrival_interval)
self.driving_to_pickup_time_total += taxi_arrival_interval
self.driving_with_riders_time_total += trip_duration
self.pickedup_riders += 1
self.earnings += self.trip_price # paying the driver / us
def calc_costs(self):
"""
This calculates actual "idle" costs - costs of unproductive idle times, discounted by the profits
:return: costs (float) for the whole simulation
"""
self.costs = self.wage_hourly * (self.driving_idle_time_total + np.sum(self.driver_queueing_times_at_LGA))/60.0
#profit = self.earnings - wages
return self.costs
def calc_minimization_function(self):
"""
THIS IS THE COST FUNCTION WE WANT TO OPTIMIZE BY MINIMIZATION:
This is a weighted function of the costs of paying drivers in idle queueing times, idle driving times,
*driver minutes are worth $0.5 because we pay them 30$ per 60 min
Added weight for the rider queueing time
And adding cost of trip_price for each churned rider (since we lost paying that)
rider ride minutes are worth $1 because they pay 30$ for 30min average trip time (60$ hourly)
:return: (float) composite value to minimize
"""
self.score_cost = (self.driving_idle_time_total + np.sum(self.driver_queueing_times_at_LGA))/2.0 +\
np.sum(self.riders_wait_times) + (self.trip_price * self.churned)
return self.score_cost
def run_simulation(self):
"""
Simulates a Taxi Line Requests at an hour with particular parameters.
Initialized and runs through the simulation.
This runs through the event schedule and implements the events one by one
:return:
"""
# Initialize first events into schedule
self.initialize_events()
# RUN THROUGH SIMULATION!
while self.time < self.end_time:
# for simulation duration
# POP FROM SCHEDULE
time, eventfunc, args = heappop(self.schedule)
self.current_event = [eventfunc.__name__, args] # for logging and debugging #TODO: remove if no need
# STOP if next time is above simulation time
if time > self.end_time: break
# ADVANCE TIME TO THAT TIME
self.time = time
# EXECUTE EVENT, WHATEVER EVENT IT IS
if self.show: print("\n{:06.3f}: executing: {}, with args: {}".format(self.time, eventfunc.__name__, args))
eventfunc(*args) # EXECUTING EVENT
def metrics(self):
"""
Record and print final plots
"""
metrics = OrderedDict([
# STRATEGY
('strategy', self.strategy),
('s1_queue_min_buffer', self.queue_min_buffer),
('s1_optimal_taxis_n', self.optimal_taxis),
('s1_tolerance', self.tolerance_to_optimal_taxis),
('s2_rate_coefficient', self.rate_coefficient),
('s2_rate', self.rate),
# SETTINGS
('time', self.time),
('riders_initialized', self.n_rider_arrivals),
('riders_so_far', self.riders_count),
('dropoffs_initialized', self.n_dropoffs),
('dropoffs_LGA_occured', self.dropoffs_LGA_occured),
('pickedup', self.pickedup_riders),
('churned', self.churned),
('LGA_tx_queue_mean', np.mean(self.LGA_taxis_queue_len_history)),
('LGA_tx_queue_median', np.median(self.LGA_taxis_queue_len_history)),
('LGA_tx_queue_std', np.std(self.LGA_taxis_queue_len_history)),
('MN_taxis', self.MN_taxis),
# METRICS and SERVICE QUALITY
('riders_waits_mean', np.mean(self.riders_wait_times)),
('riders_waits_median', np.median(self.riders_wait_times)),
('riders_waits_std', np.std(self.riders_wait_times)),
('driver_queueing_mean', np.mean(self.driver_queueing_times_at_LGA)),
('driver_queueing_median', np.median(self.driver_queueing_times_at_LGA)),
('driver_queueing_std', np.std(self.driver_queueing_times_at_LGA)),
('driving_idle_time_total', self.driving_idle_time_total ),
('driving_to_pickup_time_total', self.driving_to_pickup_time_total),
('driving_with_riders_time_total', self.driving_with_riders_time_total ),
#self.trip_price, trip_price # currently), flat fair. later), have variable trip price based on Time of Day), Distance), +1. #TODO
('costs', self.calc_costs()),
('earnings', self.earnings),
('profit', self.earnings - self.costs),
('score_cost', self.calc_minimization_function()),
])
# Print dictionary in new lines
if self.show:
for i in metrics: print i, ": ", metrics[i]
# log this in DataFrame
row = pd.DataFrame.from_dict(metrics, orient='index').T
self.df = self.df.append(row)
return row
def plots(self):
"""
Show final metric plots
"""
# For title:
if self.strategy == 1:
title_context = " | Strategy 1: q_min_buffer={} (opt_taxis={})".format(round(self.queue_min_buffer),round(self.optimal_taxis))
elif self.strategy == 2:
title_context = " | Strategy 2: weight={} (rate={})".format(round(self.rate_coefficient,2),round(self.rate,2))
# PLOTS
sns.distplot(self.times_arrivals,bins=self.n_rider_arrivals/5,rug=True,kde=True).set_title("Rider arrivals within {} min".format(self.end_time))
plt.xlim(0,self.end_time) ; plt.show()
sns.distplot(self.times_dropoffs,bins=self.n_dropoffs/5,rug=True,kde=True).set_title("Dropoffs within {} min".format(self.end_time))
plt.xlim(0,self.end_time) ; plt.show()
# PLOT LGA Queue History
taxi_queue_history = np.array(self.LGA_taxis_queue_len_history).T
plt.plot(taxi_queue_history[0], taxi_queue_history[1])
plt.xlabel("Time (Minutes)") ; plt.ylabel("Number of Taxis in Queue at that minute")
plt.title("Taxis Queue Size in LGA over time" + title_context)
plt.xlim(0,self.end_time) ; plt.show()
# Plot waiting times distribution
sns.distplot(self.riders_wait_times,bins=len(self.riders_wait_times)/5,rug=True,kde=True).set_title("Histogram: Riders Waiting Times" + title_context)
plt.xlim(0,) ; plt.show()
sns.distplot(self.driver_queueing_times_at_LGA,bins=len(self.driver_queueing_times_at_LGA)/5,rug=True,kde=True).set_title("Histogram: Taxis LGA Queueing Times" + title_context)
plt.xlim(0,) ; plt.show()
# sim = TaxiStand_Controller(n_rider_arrivals=80, n_dropoffs=30, end_time=30)
# sim = TaxiStand_Controller()
# sim.run_simulation()
# print "sim.riders_count: {}, sim.taxis_count: {}, sim.pickedup_riders: {}".format(sim.riders_count, sim.taxis_count, sim.pickedup_riders)
# print "sim.trips: ", sim.trips
# Sample case (with relatively fewer rides, for printing)
sim = TaxiStand_Controller(n_rider_arrivals=50, n_dropoffs=10,
end_time=10,total_taxis=50,
strategy=1, queue_min_buffer=5,
tolerance_proportion = 0.1,
trip_price=30, show=1)
sim.run_simulation()
sim.plots()
sim.metrics().T
#sim = TaxiStand_Controller(n_rider_arrivals=10, n_dropoffs=3, end_time=10)
#sim.run_simulation()
#sim.plots()
n_repetitions = 100
# df = pd.DataFrame()
print("================ Testing Queueing Minutes Buffer For Strategy 1 ===============")
for buffer_min in range(4,20):
for rep in range(n_repetitions):
# for each buffer_minutes tested,
# repeat same experiment n_repetitions times to reduce uncertainty
sim = TaxiStand_Controller(n_rider_arrivals=1000, n_dropoffs=100,
end_time=100,total_taxis=500,
strategy=1, queue_min_buffer=buffer_min,
tolerance_proportion = 0.1,
trip_price=30, show=0)
sim.run_simulation()
if rep == 1:
print("\n----------SIMULATING STRATEGY 1 WITH BUFFER {} MIN------------\n\n".format(buffer_min))
sim.plots() # present plots only with first iteration
df = df.append(sim.metrics())
# Backup and Save Results
dfcopy = df
df.to_csv("results_strategy1.csv")
df.T # Show database in a more convenient way: Transformed so that variables are rows instead of columns (for A4)
#sns.regplot('obs', 'mod', data=df, lowess=True, ci=95, n_boot=1000)
plt.figure(figsize=(10,5));
print("number of observations: {}".format(len(df)))
sns.regplot(x="s1_queue_min_buffer", y="score_cost", data=df,
x_estimator=np.mean, lowess=True, ci=95, n_boot=100,
label="Cost Function", color="red")
sns.regplot(x="s1_queue_min_buffer", y="profit", data=df,
x_estimator=np.mean, lowess=True, ci=95,n_boot=100,
label="Profit", color="green").set_title("Strategy 1: Cost function (red) and Profit (green) by Rate Coefficient")
plt.show();
sns.regplot(x="s1_queue_min_buffer", y="churned", data=df,
x_estimator=np.mean, lowess=True, ci=95,n_boot=100,
color="red")
sns.regplot(x="s1_queue_min_buffer", y="pickedup", data=df,
x_estimator=np.mean, lowess=True, ci=95,n_boot=100,
color="green").set_title("Strategy 1: Churned (Red) and Picked-Up (Green) riders by Rate Coefficient")
plt.show();
sns.regplot(x="s1_queue_min_buffer", y="riders_waits_mean", data=df,
x_estimator=np.mean, lowess=True, ci=95, color = "red",
n_boot=100)
sns.regplot(x="s1_queue_min_buffer", y="driver_queueing_mean", data=df,
x_estimator=np.mean, lowess=True, ci=95, color = "green",
n_boot=100).set_title("Strategy 1: Rider (red) and Driver (green) Average Wait Times by Rate Coefficient")
plt.show();
Optimal queue waiting minutes buffer was found around 8.00, in this case an optimal queue of 72 Taxis in the queue at any given moment for 1000 riders and 100 random dropoffs over 100 minutes.
#sim = TaxiStand_Controller(n_rider_arrivals=10, n_dropoffs=3, end_time=10)
#sim.run_simulation()
#sim.plots()
df2 = pd.DataFrame()
print("============Testing rate coefficient of Strategy 2==============")
for weight in [0.75, 1, 1.25, 1.5, 1.75, 2, 2.25, 2.5]:
sim = TaxiStand_Controller(n_rider_arrivals=1000, n_dropoffs=100,
end_time=100,total_taxis=500,
strategy=2, # TESTING STRATEGY 2
rate_coefficient = weight, # TESTING RATE COEFFICIENT
trip_price=30, show=0)
print("\n\n----------SIMULATING STRATEGY 2 \
WITH RATE COEFFICIENT {}------------\n\n".format(weight))
sim.run_simulation()
sim.plots()
row = sim.metrics()
df = df.append(sim.metrics())
dfcopy2 = df2
df2.to_csv("results_strategy2A.csv")
df2.T
# REFINING VALUES
# Continuing the same DF2 , not creating a new one
print("============Testing rate coefficient of Strategy 2==============")
n_repeats = 100
for weight in np.arange(0.8,3,0.1):
for i in range(n_repeats):
sim = TaxiStand_Controller(n_rider_arrivals=1000, n_dropoffs=100,
end_time=100,total_taxis=500,
strategy=2, # TESTING STRATEGY 2
rate_coefficient = weight, # TESTING RATE COEFFICIENT
trip_price=30, show=0)
sim.run_simulation()
if i == 0: #show only first time for each parameter tested, not every repitition
print("\n\n----------SIMULATING STRATEGY 2 WITH RATE COEFFICIENT {}------------\n\n".format(weight))
sim.plots()
df2 = df2.append(sim.metrics())
# Backup and Save Results
dfcopy2 = df2
df2.to_csv("results_strategy2B.csv")
df2.T # Show database in a more convenient way: Transformed so that variables are rows instead of columns (for A4)
The Best rate coefficient was 1.2. We can test more closely around the range of 1 - 2. Below a coefficient of 1 there was an excess of taxis (obviously from the formula). This was included just to proof-test and show trend.
df2.head(10).T
df2
# df2 = df2[df2.s2_rate_coefficient != 1.25] # there were outliers due to manual variations in other parameters, I here removed those 4 outliers
# df2 = df2[12:] removed initial outliers
#sns.regplot('obs', 'mod', data=df2, lowess=True, ci=95, n_boot=1000)
plt.figure(figsize=(10,5))
print("number of observations: {}".format(len(df2)))
sns.regplot(x="s2_rate_coefficient", y="score_cost", data=df2,
x_estimator=np.mean, lowess=True, ci=95, #n_boot=100,
label="Cost Function", color="red").set_title("Strategy 2: Cost Function Score by Rate Coefficient")
sns.regplot(x="s2_rate_coefficient", y="profit", data=df2,
x_estimator=np.mean, lowess=True, ci=95,#n_boot=100,
label="Profit", color="green").set_title("Strategy 2: Profit by Rate Coefficient")
plt.show();
sns.regplot(x="s2_rate_coefficient", y="churned", data=df2,
x_estimator=np.mean, lowess=True, ci=95,#n_boot=100,
color="red").set_title("Strategy 2: Churned Riders by Rate Coefficient")
sns.regplot(x="s2_rate_coefficient", y="pickedup", data=df2,
x_estimator=np.mean, lowess=True, ci=95,#n_boot=100,
color="green").set_title("Strategy 2: Churned (Red) and Picked-Up (Green) riders by Rate Coefficient")
plt.show();
sns.regplot(x="s2_rate_coefficient", y="riders_waits_mean", data=df2,
x_estimator=np.mean, lowess=True, ci=95,
n_boot=100).set_title("Strategy 2: Rider Average Wait Times by Rate Coefficient")
plt.show();
df2[df2.s2_rate_coefficient>1.2]